{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "3700F0383ACD456594322246327904AB",
    "jupyter": {},
    "mdEditEnable": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "source": [
    " # 深度卷积神经网络（AlexNet） "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "E59CD3957F874D2D9FB1B0D7C1A3566C",
    "jupyter": {},
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "source": [
    "LeNet:  在大的真实数据集上的表现并不尽如⼈意。     \n",
    "1.神经网络计算复杂。  \n",
    "2.还没有⼤量深⼊研究参数初始化和⾮凸优化算法等诸多领域。  \n",
    "  \n",
    "机器学习的特征提取:手工定义的特征提取函数  \n",
    "神经网络的特征提取：通过学习得到数据的多级表征，并逐级表⽰越来越抽象的概念或模式。  \n",
    "  \n",
    "神经网络发展的限制:数据、硬件"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "894A96EBC1404E008C042A83D1A053F9",
    "jupyter": {},
    "mdEditEnable": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "source": [
    "### AlexNet\n",
    "首次证明了学习到的特征可以超越⼿⼯设计的特征，从而⼀举打破计算机视觉研究的前状。   \n",
    "**特征：**\n",
    "1. 8层变换，其中有5层卷积和2层全连接隐藏层，以及1个全连接输出层。\n",
    "2. 将sigmoid激活函数改成了更加简单的ReLU激活函数。\n",
    "3. 用Dropout来控制全连接层的模型复杂度。\n",
    "4. 引入数据增强，如翻转、裁剪和颜色变化，从而进一步扩大数据集来缓解过拟合。\n",
    "\n",
    "![Image Name](https://cdn.kesci.com/upload/image/q5kv4gpx88.png?imageView2/0/w/640/h/640)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "id": "DD41B540CFAB491B9D43105C3D90647C",
    "jupyter": {},
    "scrolled": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "#目前GPU算力资源预计17日上线，在此之前本代码只能使用CPU运行。\n",
    "#考虑到本代码中的模型过大，CPU训练较慢，\n",
    "#我们还将代码上传了一份到 https://www.kaggle.com/boyuai/boyu-d2l-modernconvolutionalnetwork\n",
    "#如希望提前使用gpu运行请至kaggle。\n",
    "\n",
    "\n",
    "import time\n",
    "import torch\n",
    "from torch import nn, optim\n",
    "import torchvision\n",
    "import numpy as np\n",
    "import sys\n",
    "sys.path.append(\"/home/kesci/input/\") \n",
    "import d2lzh1981 as d2l\n",
    "import os\n",
    "import torch.nn.functional as F\n",
    "\n",
    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
    "\n",
    "class AlexNet(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(AlexNet, self).__init__()\n",
    "        self.conv = nn.Sequential(\n",
    "            nn.Conv2d(1, 96, 11, 4), # in_channels, out_channels, kernel_size, stride, padding\n",
    "            nn.ReLU(),\n",
    "            nn.MaxPool2d(3, 2), # kernel_size, stride\n",
    "            # 减小卷积窗口，使用填充为2来使得输入与输出的高和宽一致，且增大输出通道数\n",
    "            nn.Conv2d(96, 256, 5, 1, 2),\n",
    "            nn.ReLU(),\n",
    "            nn.MaxPool2d(3, 2),\n",
    "            # 连续3个卷积层，且使用更小的卷积窗口。除了最后的卷积层外，进一步增大了输出通道数。\n",
    "            # 前两个卷积层后不使用池化层来减小输入的高和宽\n",
    "            nn.Conv2d(256, 384, 3, 1, 1),\n",
    "            nn.ReLU(),\n",
    "            nn.Conv2d(384, 384, 3, 1, 1),\n",
    "            nn.ReLU(),\n",
    "            nn.Conv2d(384, 256, 3, 1, 1),\n",
    "            nn.ReLU(),\n",
    "            nn.MaxPool2d(3, 2)\n",
    "        )\n",
    "         # 这里全连接层的输出个数比LeNet中的大数倍。使用丢弃层来缓解过拟合\n",
    "        self.fc = nn.Sequential(\n",
    "            nn.Linear(256*5*5, 4096),\n",
    "            nn.ReLU(),\n",
    "            nn.Dropout(0.5),\n",
    "            #由于使用CPU镜像，精简网络，若为GPU镜像可添加该层\n",
    "            #nn.Linear(4096, 4096),\n",
    "            #nn.ReLU(),\n",
    "            #nn.Dropout(0.5),\n",
    "\n",
    "            # 输出层。由于这里使用Fashion-MNIST，所以用类别数为10，而非论文中的1000\n",
    "            nn.Linear(4096, 10),\n",
    "        )\n",
    "    #由于self.conv的输出是四维的（batchsize*channels*height*wigth），而self.fc的输入是二维的（batchsize*hiddens），forward用来进行转换\n",
    "    def forward(self, img):\n",
    "\n",
    "        feature = self.conv(img)\n",
    "        #img.shape[0]代表层数\n",
    "        output = self.fc(feature.view(img.shape[0], -1))\n",
    "        return output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "id": "237401462039416F8B5874A7DA7FD9F2",
    "jupyter": {},
    "scrolled": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AlexNet(\n",
      "  (conv): Sequential(\n",
      "    (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))\n",
      "    (1): ReLU()\n",
      "    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
      "    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n",
      "    (4): ReLU()\n",
      "    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
      "    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
      "    (7): ReLU()\n",
      "    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
      "    (9): ReLU()\n",
      "    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
      "    (11): ReLU()\n",
      "    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
      "  )\n",
      "  (fc): Sequential(\n",
      "    (0): Linear(in_features=6400, out_features=4096, bias=True)\n",
      "    (1): ReLU()\n",
      "    (2): Dropout(p=0.5, inplace=False)\n",
      "    (3): Linear(in_features=4096, out_features=10, bias=True)\n",
      "  )\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "# 写完之后，可以通过print来看自己模型结构有没有写错\n",
    "net = AlexNet()\n",
    "print(net)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "EEF29AB29F37460C84C492FCE02FEF2D",
    "jupyter": {},
    "mdEditEnable": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "source": [
    "### 载入数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "id": "8994A770DF884A5D87F60737C50D0A1F",
    "jupyter": {},
    "scrolled": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "X = torch.Size([16, 1, 224, 224]) \n",
      "Y = tensor([5, 2, 9, 3, 1, 8, 3, 3, 2, 6, 1, 6, 2, 4, 4, 8], dtype=torch.int32)\n"
     ]
    }
   ],
   "source": [
    "# 本函数已保存在d2lzh_pytorch包中方便以后使用\n",
    "def load_data_fashion_mnist(batch_size, resize=None, root='/home/kesci/input/FashionMNIST2065'):\n",
    "    \"\"\"Download the fashion mnist dataset and then load into memory.\"\"\"\n",
    "    trans = []\n",
    "    if resize:\n",
    "        #因为图片的大小是不一致的，所以需要进行resize，resize大小就是把图片改成24*24\n",
    "        trans.append(torchvision.transforms.Resize(size=resize))\n",
    "    #将图片转换成tensor的形式\n",
    "    trans.append(torchvision.transforms.ToTensor())\n",
    "    \n",
    "    transform = torchvision.transforms.Compose(trans)\n",
    "    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)\n",
    "    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)\n",
    "\n",
    "    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=2)\n",
    "    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=2)\n",
    "\n",
    "    return train_iter, test_iter\n",
    "\n",
    "#batchsize=128\n",
    "batch_size = 16\n",
    "# 如出现“out of memory”的报错信息，可减小batch_size或resize\n",
    "train_iter, test_iter = load_data_fashion_mnist(batch_size,224)\n",
    "for X, Y in train_iter:\n",
    "    print('X =', X.shape,\n",
    "        '\\nY =', Y.type(torch.int32))\n",
    "    break\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "B3671547A0D04495B5B1F2ADE856500F",
    "jupyter": {},
    "mdEditEnable": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "source": [
    "### 训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "id": "FB33F7F4BEE1486187254524422FDF80",
    "jupyter": {},
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "lr, num_epochs = 0.001, 3\n",
    "optimizer = torch.optim.Adam(net.parameters(), lr=lr)\n",
    "d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "C40CCB5CDD8E4D3B9A6264B03CBEEB76",
    "jupyter": {},
    "mdEditEnable": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "source": [
    "#  使用重复元素的网络（VGG）\n",
    "VGG：通过重复使⽤简单的基础块来构建深度模型。  \n",
    "Block:数个相同的填充为1、窗口形状为$3\\times 3$的卷积层,接上一个步幅为2、窗口形状为$2\\times 2$的最大池化层。  \n",
    "卷积层保持输入的高和宽不变，而池化层则对其减半。下面的应该VGG block的最大池化层是2*2，图片输错了\n",
    "\n",
    "\n",
    "![Image Name](https://cdn.kesci.com/upload/image/q5l6vut7h1.png?imageView2/0/w/640/h/640)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "F3D1E7D0AC154E6A99842D587608E870",
    "jupyter": {},
    "mdEditEnable": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "source": [
    "### VGG11的简单实现"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "id": "AA4AB087A263436B90B719085FEABC67",
    "jupyter": {},
    "scrolled": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "\n",
    "def vgg_block(num_convs, in_channels, out_channels): #（一个vggblock里面的）卷积层个数，输入通道数，输出通道数\n",
    "    blk = []\n",
    "    for i in range(num_convs):\n",
    "        if i == 0:\n",
    "            blk.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))\n",
    "        else:\n",
    "            #除了第一层，后面的通道数都保持不变\n",
    "            blk.append(nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1))\n",
    "        blk.append(nn.ReLU())\n",
    "    blk.append(nn.MaxPool2d(kernel_size=2, stride=2)) # 这里会使宽高减半\n",
    "    #最后通过sequential进行组合\n",
    "    return nn.Sequential(*blk)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "id": "3199FD641C1F417FA3C6B65784A20CF2",
    "jupyter": {},
    "scrolled": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "conv_arch = ((1, 1, 64), (1, 64, 128), (2, 128, 256), (2, 256, 512), (2, 512, 512)) #卷积层个数，输入通道数，输出通道数\n",
    "# 经过5个vgg_block, 宽高会减半5次, 变成 224/32 = 7\n",
    "fc_features = 512 * 7 * 7 # c * w * h\n",
    "fc_hidden_units = 4096 # 任意"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "id": "08AA6A69F80B4DAC8FA831C6E5C9260B",
    "jupyter": {},
    "scrolled": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "def vgg(conv_arch, fc_features, fc_hidden_units=4096):\n",
    "    net = nn.Sequential()\n",
    "    # 卷积层部分\n",
    "    for i, (num_convs, in_channels, out_channels) in enumerate(conv_arch):\n",
    "        # 每经过一个vgg_block都会使宽高减半\n",
    "        net.add_module(\"vgg_block_\" + str(i+1), vgg_block(num_convs, in_channels, out_channels))\n",
    "    # 全连接层部分，将全连接层串连\n",
    "    net.add_module(\"fc\", nn.Sequential(d2l.FlattenLayer(),#这一层是把上面的张量进行展平\n",
    "                                 nn.Linear(fc_features, fc_hidden_units),\n",
    "                                 nn.ReLU(),\n",
    "                                 nn.Dropout(0.5),\n",
    "                                 nn.Linear(fc_hidden_units, fc_hidden_units),\n",
    "                                 nn.ReLU(),\n",
    "                                 nn.Dropout(0.5),\n",
    "                                 nn.Linear(fc_hidden_units, 10)\n",
    "                                ))\n",
    "    return net"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "id": "A5FDDE9D6C404D868E3AB5E1731043D9",
    "jupyter": {},
    "scrolled": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "vgg_block_1 output shape:  torch.Size([1, 64, 112, 112])\n",
      "vgg_block_2 output shape:  torch.Size([1, 128, 56, 56])\n",
      "vgg_block_3 output shape:  torch.Size([1, 256, 28, 28])\n",
      "vgg_block_4 output shape:  torch.Size([1, 512, 14, 14])\n",
      "vgg_block_5 output shape:  torch.Size([1, 512, 7, 7])\n",
      "fc output shape:  torch.Size([1, 10])\n"
     ]
    }
   ],
   "source": [
    "net = vgg(conv_arch, fc_features, fc_hidden_units)\n",
    "X = torch.rand(1, 1, 224, 224)\n",
    "\n",
    "# named_children获取一级子模块及其名字(named_modules会返回所有子模块,包括子模块的子模块)\n",
    "for name, blk in net.named_children(): \n",
    "    X = blk(X)\n",
    "    print(name, 'output shape: ', X.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "id": "40A6F9278D034D4A874189A1F8F52ABF",
    "jupyter": {},
    "scrolled": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sequential(\n",
      "  (vgg_block_1): Sequential(\n",
      "    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
      "    (1): ReLU()\n",
      "    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
      "  )\n",
      "  (vgg_block_2): Sequential(\n",
      "    (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
      "    (1): ReLU()\n",
      "    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
      "  )\n",
      "  (vgg_block_3): Sequential(\n",
      "    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
      "    (1): ReLU()\n",
      "    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
      "    (3): ReLU()\n",
      "    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
      "  )\n",
      "  (vgg_block_4): Sequential(\n",
      "    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
      "    (1): ReLU()\n",
      "    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
      "    (3): ReLU()\n",
      "    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
      "  )\n",
      "  (vgg_block_5): Sequential(\n",
      "    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
      "    (1): ReLU()\n",
      "    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
      "    (3): ReLU()\n",
      "    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
      "  )\n",
      "  (fc): Sequential(\n",
      "    (0): FlattenLayer()\n",
      "    (1): Linear(in_features=3136, out_features=512, bias=True)\n",
      "    (2): ReLU()\n",
      "    (3): Dropout(p=0.5, inplace=False)\n",
      "    (4): Linear(in_features=512, out_features=512, bias=True)\n",
      "    (5): ReLU()\n",
      "    (6): Dropout(p=0.5, inplace=False)\n",
      "    (7): Linear(in_features=512, out_features=10, bias=True)\n",
      "  )\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "ratio = 8\n",
    "#把里面的通道数都除以8。fashionmenist不需要太多通道，而且参数多也容易过拟合\n",
    "small_conv_arch = [(1, 1, 64//ratio), (1, 64//ratio, 128//ratio), (2, 128//ratio, 256//ratio), \n",
    "                   (2, 256//ratio, 512//ratio), (2, 512//ratio, 512//ratio)]\n",
    "net = vgg(small_conv_arch, fc_features // ratio, fc_hidden_units // ratio)\n",
    "print(net)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "id": "5325FD0A7DB64D43809A660129617282",
    "jupyter": {},
    "scrolled": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "batchsize=16\n",
    "#batch_size = 64\n",
    "# 如出现“out of memory”的报错信息，可减小batch_size或resize\n",
    "# train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)\n",
    "\n",
    "lr, num_epochs = 0.001, 5\n",
    "optimizer = torch.optim.Adam(net.parameters(), lr=lr)\n",
    "d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "7D83E04F316B4D0B933654E2F9FB08B9",
    "jupyter": {},
    "mdEditEnable": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "source": [
    "#  ⽹络中的⽹络（NiN） \n",
    "LeNet、AlexNet和VGG：先以由卷积层构成的模块充分抽取 空间特征，再以由全连接层构成的模块来输出分类结果。  \n",
    "NiN：串联多个由卷积层和“全连接”层构成的小⽹络来构建⼀个深层⽹络。这个“全连接层”指的是使用1×1的卷积核  \n",
    "⽤了输出通道数等于标签类别数的NiN块，然后使⽤全局平均池化层对每个通道中所有元素求平均并直接⽤于分类。  \n",
    "NiN是调整通道数来使得最后的通道数等于类别个数\n",
    "\n",
    "![Image Name](https://cdn.kesci.com/upload/image/q5l6u1p5vy.png?imageView2/0/w/960/h/960)\n",
    "\n",
    "1×1卷积核作用   \n",
    "1.放缩通道数：通过控制卷积核的数量达到通道数的放缩。  \n",
    "2.增加非线性。1×1卷积核的卷积过程相当于全连接层的计算过程，并且还加入了非线性激活函数，从而可以增加网络的非线性。  \n",
    "3.计算参数少   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "id": "88FB988FED1F4CB08417E01A3C560459",
    "jupyter": {},
    "scrolled": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "\n",
    "def nin_block(in_channels, out_channels, kernel_size, stride, padding):\n",
    "    blk = nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding),\n",
    "                        nn.ReLU(),\n",
    "                        nn.Conv2d(out_channels, out_channels, kernel_size=1),\n",
    "                        nn.ReLU(),\n",
    "                        nn.Conv2d(out_channels, out_channels, kernel_size=1),\n",
    "                        nn.ReLU())\n",
    "    return blk"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "id": "AB1A33FE0F1C451F87F390FEC87D334E",
    "jupyter": {},
    "scrolled": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# 已保存在d2lzh_pytorch\n",
    "class GlobalAvgPool2d(nn.Module):\n",
    "    # 全局平均池化层可通过将池化窗口形状设置成输入的高和宽实现\n",
    "    def __init__(self):\n",
    "        super(GlobalAvgPool2d, self).__init__()\n",
    "    def forward(self, x):\n",
    "        return F.avg_pool2d(x, kernel_size=x.size()[2:])\n",
    "\n",
    "net = nn.Sequential(\n",
    "    nin_block(1, 96, kernel_size=11, stride=4, padding=0),\n",
    "    nn.MaxPool2d(kernel_size=3, stride=2),\n",
    "    nin_block(96, 256, kernel_size=5, stride=1, padding=2),\n",
    "    nn.MaxPool2d(kernel_size=3, stride=2),\n",
    "    nin_block(256, 384, kernel_size=3, stride=1, padding=1),\n",
    "    nn.MaxPool2d(kernel_size=3, stride=2), \n",
    "    nn.Dropout(0.5),\n",
    "    # 标签类别数是10\n",
    "    nin_block(384, 10, kernel_size=3, stride=1, padding=1),\n",
    "    GlobalAvgPool2d(), \n",
    "    # 将四维（最后的输出是 样本数，10,1,1 这四维）的输出转成二维（样本数，10）的输出，其形状为(批量大小, 10)\n",
    "    d2l.FlattenLayer())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "id": "08C2986CBEEF40F98825FC6FF163C572",
    "jupyter": {},
    "scrolled": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 output shape:  torch.Size([1, 96, 54, 54])\n",
      "1 output shape:  torch.Size([1, 96, 26, 26])\n",
      "2 output shape:  torch.Size([1, 256, 26, 26])\n",
      "3 output shape:  torch.Size([1, 256, 12, 12])\n",
      "4 output shape:  torch.Size([1, 384, 12, 12])\n",
      "5 output shape:  torch.Size([1, 384, 5, 5])\n",
      "6 output shape:  torch.Size([1, 384, 5, 5])\n",
      "7 output shape:  torch.Size([1, 10, 5, 5])\n",
      "8 output shape:  torch.Size([1, 10, 1, 1])\n",
      "9 output shape:  torch.Size([1, 10])\n"
     ]
    }
   ],
   "source": [
    "X = torch.rand(1, 1, 224, 224)\n",
    "for name, blk in net.named_children(): \n",
    "    X = blk(X)\n",
    "    print(name, 'output shape: ', X.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "id": "DBD61B6A08E1400996DCB60B4B570818",
    "jupyter": {},
    "scrolled": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "batch_size = 128\n",
    "# 如出现“out of memory”的报错信息，可减小batch_size或resize\n",
    "#train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)\n",
    "\n",
    "lr, num_epochs = 0.002, 5\n",
    "optimizer = torch.optim.Adam(net.parameters(), lr=lr)\n",
    "d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "AE1E233B008843DEB6E582ECFEE65617",
    "jupyter": {},
    "mdEditEnable": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "source": [
    "NiN重复使⽤由卷积层和代替全连接层的1×1卷积层构成的NiN块来构建深层⽹络。  \n",
    "NiN去除了容易造成过拟合的全连接输出层，而是将其替换成输出通道数等于标签类别数 的NiN块和全局平均池化层。   \n",
    "NiN的以上设计思想影响了后⾯⼀系列卷积神经⽹络的设计。  "
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "7886126F79424D75863FB4B09025CE40",
    "jupyter": {},
    "mdEditEnable": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "source": [
    "# GoogLeNet\n",
    "1. 由Inception基础块组成。  \n",
    "2. Inception块相当于⼀个有4条线路的⼦⽹络。它通过不同窗口形状的卷积层和最⼤池化层来并⾏抽取信息，并使⽤1×1卷积层减少通道数从而降低模型复杂度。   \n",
    "3. 可以⾃定义的超参数是每个层的输出通道数，我们以此来控制模型复杂度。 \n",
    "\n",
    "![Image Name](https://cdn.kesci.com/upload/image/q5l6uortw.png?imageView2/0/w/640/h/640)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "id": "E9CBC745630F489CB69A0001B076F199",
    "jupyter": {},
    "scrolled": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "\n",
    "class Inception(nn.Module):\n",
    "    # c1 - c4为每条线路里的层的输出通道数\n",
    "    def __init__(self, in_c, c1, c2, c3, c4):\n",
    "        super(Inception, self).__init__()\n",
    "        # 线路1，单1 x 1卷积层\n",
    "        self.p1_1 = nn.Conv2d(in_c, c1, kernel_size=1)\n",
    "        # 线路2，1 x 1卷积层后接3 x 3卷积层\n",
    "        self.p2_1 = nn.Conv2d(in_c, c2[0], kernel_size=1)\n",
    "        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)\n",
    "        # 线路3，1 x 1卷积层后接5 x 5卷积层\n",
    "        self.p3_1 = nn.Conv2d(in_c, c3[0], kernel_size=1)\n",
    "        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)\n",
    "        # 线路4，3 x 3最大池化层后接1 x 1卷积层\n",
    "        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)\n",
    "        self.p4_2 = nn.Conv2d(in_c, c4, kernel_size=1)\n",
    "\n",
    "    def forward(self, x):\n",
    "        p1 = F.relu(self.p1_1(x))\n",
    "        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))\n",
    "        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))\n",
    "        p4 = F.relu(self.p4_2(self.p4_1(x)))\n",
    "        return torch.cat((p1, p2, p3, p4), dim=1)  # 在通道维上连结输出"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "E4C63967592040D9BD9D31D7078999DC",
    "jupyter": {},
    "mdEditEnable": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "source": [
    "### GoogLeNet模型\n",
    "完整模型结构  \n",
    "\n",
    "![Image Name](https://cdn.kesci.com/upload/image/q5l6x0fyyn.png?imageView2/0/w/640/h/640)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "id": "566939FB07C646D380806E2CA941FD85",
    "jupyter": {},
    "scrolled": false,
    "slideshow": {
     "slide_type": "slide"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "#b1模块左右是抽取特征，缩小大小的作用。图片输入的大小是1*96*96，尝试自己记录一下每经过一个层，其大小的变化。\n",
    "b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),#缩小宽和高\n",
    "                   nn.ReLU(),\n",
    "                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))\n",
    "\n",
    "b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),\n",
    "                   nn.Conv2d(64, 192, kernel_size=3, padding=1),#保持形状不改变。\n",
    "                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))\n",
    "\n",
    "b3 = nn.Sequential(Inception(192, 64, (96, 128), (16, 32), 32),\n",
    "                   Inception(256, 128, (128, 192), (32, 96), 64),\n",
    "                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))\n",
    "\n",
    "b4 = nn.Sequential(Inception(480, 192, (96, 208), (16, 48), 64),\n",
    "                   Inception(512, 160, (112, 224), (24, 64), 64),\n",
    "                   Inception(512, 128, (128, 256), (24, 64), 64),\n",
    "                   Inception(512, 112, (144, 288), (32, 64), 64),\n",
    "                   Inception(528, 256, (160, 320), (32, 128), 128),\n",
    "                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))\n",
    "\n",
    "b5 = nn.Sequential(Inception(832, 256, (160, 320), (32, 128), 128),\n",
    "                   Inception(832, 384, (192, 384), (48, 128), 128),\n",
    "                   d2l.GlobalAvgPool2d())\n",
    "\n",
    "net = nn.Sequential(b1, b2, b3, b4, b5, \n",
    "                    d2l.FlattenLayer(), nn.Linear(1024, 10))\n",
    "\n",
    "net = nn.Sequential(b1, b2, b3, b4, b5, d2l.FlattenLayer(), nn.Linear(1024, 10))\n",
    "\n",
    "X = torch.rand(1, 1, 96, 96)\n",
    "\n",
    "for blk in net.children(): \n",
    "    X = blk(X)\n",
    "    print('output shape: ', X.shape)\n",
    "\n",
    "#batchsize=128\n",
    "batch_size = 16\n",
    "# 如出现“out of memory”的报错信息，可减小batch_size或resize\n",
    "#train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)\n",
    "\n",
    "lr, num_epochs = 0.001, 5\n",
    "optimizer = torch.optim.Adam(net.parameters(), lr=lr)\n",
    "d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
